Normal Start Functions
Load isofiles
isofiles <-
file.path(
"Results",
c(
"180226_alkanes_C_GB",
"180228_alkanes_C_GB",
"180302_alkanes_C_GB"
)
) %>%
iso_read_continuous_flow()
## # A tibble: 20 x 4
## file_id type func details
## <chr> <chr> <chr> <chr>
## 1 511__Linearity CO2__.dxf error extract_dxf_r… cannot identify measured…
## 2 511__Linearity CO2__.dxf error extract_isoda… `peak`, `start`, `rt`, `…
## 3 512__Linearity CO2__.dxf error extract_dxf_r… cannot identify measured…
## 4 512__Linearity CO2__.dxf error extract_isoda… `peak`, `start`, `rt`, `…
## 5 513__Linearity CO2__.dxf error extract_dxf_r… cannot identify measured…
## 6 513__Linearity CO2__.dxf error extract_isoda… `peak`, `start`, `rt`, `…
## 7 535__Linearity CO2__.dxf error extract_dxf_r… cannot identify measured…
## 8 535__Linearity CO2__.dxf error extract_isoda… `peak`, `start`, `rt`, `…
## 9 536__Linearity CO2__.dxf error extract_dxf_r… cannot identify measured…
## 10 536__Linearity CO2__.dxf error extract_isoda… `peak`, `start`, `rt`, `…
## 11 537__Linearity CO2__.dxf error extract_dxf_r… cannot identify measured…
## 12 537__Linearity CO2__.dxf error extract_isoda… `peak`, `start`, `rt`, `…
## 13 557__CO2 zero_6V_.dxf error extract_dxf_r… cannot identify measured…
## 14 557__CO2 zero_6V_.dxf error extract_isoda… `peak`, `start`, `rt`, `…
## 15 568__Linearity CO2__.dxf error extract_dxf_r… cannot identify measured…
## 16 568__Linearity CO2__.dxf error extract_isoda… `peak`, `start`, `rt`, `…
## 17 569__Linearity CO2__.dxf error extract_dxf_r… cannot identify measured…
## 18 569__Linearity CO2__.dxf error extract_isoda… `peak`, `start`, `rt`, `…
## 19 570__Linearity CO2__.dxf error extract_dxf_r… cannot identify measured…
## 20 570__Linearity CO2__.dxf error extract_isoda… `peak`, `start`, `rt`, `…
Data
File Info
# all file info
isofiles %>% iso_get_file_info()
## Info: aggregating file info from 67 data file(s)
Vendor data table
isofiles %>% iso_get_vendor_data_table()
## Info: aggregating vendor data table without units from 67 data file(s)
Chromatograms
isofiles %>%
# fetch a few of the files of interst
iso_filter_files(parse_number(Analysis) %in% c(557, 580)) %>%
# plot just mass 28
iso_plot_continuous_flow_data(data = c(46)) %>%
# make interactive
ggplotly(dynamicTicks = TRUE)
## Warning: You need the dev version of ggplot2 to use `dynamicTicks`
Analysis
Select relevant data
data_table <-
isofiles %>%
# filter the files you want to use
# --> exclude CO2 zeros
iso_filter_files(!str_detect(`Identifier 1`, "CO2 zero")) %>%
# --> select only good analyses
iso_filter_files(parse_number(Analysis) > 572) %>%
# get the vendor data table and file info
iso_get_vendor_data_table(
select = c(
# peak info
Nr., Start, Rt, End,
# amplitudes and intensities
`Ampl 44`, `Ampl 46`, `Intensity All`, `Intensity 44`,
# ratios and deltas
`R 46CO2/44CO2`, `d 13C/12C`
),
include_file_info = c(
file_datetime, Analysis, `Identifier 1`, `Identifier 2`, `GC Method`, `Seed Oxidation`
)
) %>%
# rename some columns to be easier to work with
rename(
Ampl44=`Ampl 44`, Ampl46=`Ampl 46`, # amplitudes
#Area28=`rIntensity 28`, Area29=`rIntensity 29`, # areas
Intensity=`Intensity All`, #peak intensities
R46C = `R 46CO2/44CO2`, d13C = `d 13C/12C`, # ratio and delta values
rt = `Rt`,
rt_start = `Start`,
rt_end = `End`
#File = `file_id`
)
## Info: applying file filter, keeping 54 of 67 files
## Info: applying file filter, keeping 8 of 54 files
## Info: aggregating vendor data table without units from 8 data file(s), including file info 'c(file_datetime, Analysis, `Identifier 1`, `Identifier 2`, `GC Method`,
## `Seed Oxidation`)'
data_table
Map peaks
### CHANGE MAPPING FILE NAME ###
metadata_samples <- read_excel(file.path("metadata", "20180226_GB_metadata.xlsx"), sheet = "files")
metadata_peak_maps <- read_excel(file.path("metadata","20180226_GB_metadata.xlsx"), sheet = "maps")
metadata_samples
data_table_with_peaks <- data_table %>%
iso_add_metadata(metadata_samples, match_by = c(`Identifier 1`, Analysis)) %>%
iso_map_peaks(metadata_peak_maps)
## Info: metadata added to 208 data rows, 0 left without metadata:
## - 1 metadata entries were mapped to 208 data entires via column 'Identifier 1'
## Info: 172 peaks in 8 files were successfully assigned, 36 could not be assigned, and 36 were missing
# missing and unidentified peaks
data_table_with_peaks %>% filter(!is_identified | is_missing)
# confirmed peaks
data_table_with_peaks <- data_table_with_peaks %>%
filter(is_identified, !is_missing, !is_ambiguous)
data_table_with_peaks
Check stability of reference peaks
The second one is the one defined to be 0 permil (so will always be), the rest is relative to that peak.
p <- data_table_with_peaks %>%
filter(is_ref_peak == "yes") %>%
ggplot() +
aes(Nr., d13C, color = file_id) +
geom_line() +
theme_bw()
ggplotly(p)
Add standard values
standards <- read_excel(file.path("metadata", "gc_irms_indiana_A6.xlsx"))
data_w_stds <- data_table_with_peaks %>%
filter(type == "standard", is_ref_peak == "no") %>%
left_join(standards, by = "compound") %>%
mutate(is_std = !is.na(true.d13C) | !is.na(true.d2H))
Process data
Focus on the analytes and calculate a few summary parameters we want to use later.
data_w_analyte_peaks <-
data_table_with_peaks %>%
# this is important so that the reference peaks are not caught up in the next set of calculations
filter(is_ref_peak == "no") %>%
# for each analysis calculate averages across analysis
group_by(Analysis) %>%
mutate(
ampl_sample_mean.mV = mean(`Ampl44`), ampl_sample_sd.mV = sd(`Ampl44`),
area_sample_mean.Vs = mean(`Intensity 44`), area_sample_sd.Vs = sd(`Intensity 44`)
)
Standards
standards <- read_excel(file.path("metadata", "gc_irms_indiana_A6.xlsx"))
kable(standards)
| C16 |
-9.1 |
-26.15 |
| C17 |
-117.8 |
-31.88 |
| C18 |
-52.0 |
-32.70 |
| C19 |
-56.3 |
-31.99 |
| C20 |
-89.7 |
-33.97 |
| C21 |
-177.8 |
-28.83 |
| C22 |
-81.3 |
-33.77 |
| C23 |
-67.2 |
-33.37 |
| C24 |
-29.7 |
-32.13 |
| C25 |
-263.0 |
-28.46 |
| C26 |
-45.9 |
-32.94 |
| C27 |
-172.8 |
-30.49 |
| C28 |
-36.8 |
-33.20 |
| C29 |
-177.8 |
-29.10 |
| C30 |
-213.6 |
-29.84 |
data_w_stds <-
data_w_analyte_peaks %>%
iso_add_standards(standards)
## Info: added 12 standard entries to 84 out of 84 rows
Visualize standards
v <- data_w_stds %>%
ggplot() +
aes(x = true.d13C, y = d13C, color = file_id) +
geom_smooth(method = "lm", se = FALSE, alpha = 0.5) +
geom_point() +
theme_bw() +
theme(legend.position = "none")
ggplotly(v)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
Calibration
data_w_calibs <- data_w_stds %>%
# prepare for calibration by defining the grouping column(s) and setting default parameters
iso_prepare_for_calibration(group_by = c(Analysis)) %>%
iso_set_default_process_parameters(delta_residual = resid_d13C) %>%
# run calibration
iso_calibrate_delta(model = lm(`d13C` ~ true.d13C)) %>%
# pull out some columns we want generally available
iso_unnest_calib_data(select = c(starts_with("Id"), ampl_sample_mean.mV))
## Info: preparing data for calibration by grouping based on 'Analysis' and nesting the grouped datasets into 'calibration_data'
## Info: calculating delta calibration fits based on 1 model ('lm(d13C ~ true.d13C)') for 8 data group(s) in 'calibration_data' with filter 'is_standard'; storing residuals in 'resid_d13C.'
data_w_calibs %>%
iso_unnest_delta_calib_summary(keep_other_list_data = FALSE) %>%
kable(digits =3)
| 573 |
A6 |
0.5uL |
312.947 |
0.940 |
0.933 |
0.522 |
141.261 |
0 |
2 |
-7.354 |
20.708 |
21.902 |
2.452 |
9 |
| 574 |
A6 |
0.5uL |
271.295 |
0.924 |
0.915 |
0.623 |
109.081 |
0 |
2 |
-9.300 |
24.601 |
25.794 |
3.494 |
9 |
| 575 |
A6 |
0.5uL |
315.313 |
0.950 |
0.944 |
0.509 |
171.132 |
0 |
2 |
-7.085 |
20.170 |
21.363 |
2.335 |
9 |
| 576 |
A6 |
1.0uL |
417.603 |
0.940 |
0.934 |
0.529 |
157.158 |
0 |
2 |
-8.300 |
22.600 |
24.055 |
2.802 |
10 |
| 577 |
A6 |
1.0uL |
480.845 |
0.989 |
0.986 |
0.224 |
364.108 |
0 |
2 |
1.685 |
2.629 |
2.005 |
0.200 |
4 |
| 578 |
A6 |
1.0uL |
432.851 |
0.946 |
0.940 |
0.508 |
158.919 |
0 |
2 |
-7.046 |
20.093 |
21.286 |
2.319 |
9 |
| 579 |
A6 |
1.0uL |
452.555 |
0.937 |
0.930 |
0.562 |
133.978 |
0 |
2 |
-8.162 |
22.323 |
23.517 |
2.840 |
9 |
| 580 |
A6 |
1.0uL |
426.282 |
0.950 |
0.945 |
0.479 |
172.259 |
0 |
2 |
-6.418 |
18.836 |
20.030 |
2.069 |
9 |
data_w_calibs %>%
# pull out seed oxidation in addition
iso_unnest_calib_data(select = `Seed Oxidation`) %>%
iso_unnest_delta_calib_coefs(select = c(-statistic), keep_other_list_data = FALSE) %>%
# arrange by term and Analysis to get a quick idea of what the numbers across analyses
arrange(term, Analysis) %>%
kable(digits = 2)
| 573 |
A6 |
0.5uL |
312.95 |
1 |
(Intercept) |
13.02 |
2.91 |
0 |
** |
| 574 |
A6 |
0.5uL |
271.29 |
1 |
(Intercept) |
14.72 |
3.47 |
0 |
** |
| 575 |
A6 |
0.5uL |
315.31 |
1 |
(Intercept) |
15.47 |
2.84 |
0 |
*** |
| 576 |
A6 |
1.0uL |
417.60 |
1 |
(Intercept) |
12.10 |
2.67 |
0 |
** |
| 577 |
A6 |
1.0uL |
480.85 |
1 |
(Intercept) |
9.64 |
1.55 |
0 |
** |
| 578 |
A6 |
1.0uL |
432.85 |
1 |
(Intercept) |
14.23 |
2.83 |
0 |
*** |
| 579 |
A6 |
1.0uL |
452.56 |
1 |
(Intercept) |
14.82 |
3.13 |
0 |
** |
| 580 |
A6 |
1.0uL |
426.28 |
1 |
(Intercept) |
13.56 |
2.67 |
0 |
*** |
| 573 |
A6 |
0.5uL |
312.95 |
1 |
true.d13C |
1.10 |
0.09 |
0 |
*** |
| 574 |
A6 |
0.5uL |
271.29 |
1 |
true.d13C |
1.15 |
0.11 |
0 |
*** |
| 575 |
A6 |
0.5uL |
315.31 |
1 |
true.d13C |
1.18 |
0.09 |
0 |
*** |
| 576 |
A6 |
1.0uL |
417.60 |
1 |
true.d13C |
1.07 |
0.09 |
0 |
*** |
| 577 |
A6 |
1.0uL |
480.85 |
1 |
true.d13C |
0.96 |
0.05 |
0 |
*** |
| 578 |
A6 |
1.0uL |
432.85 |
1 |
true.d13C |
1.13 |
0.09 |
0 |
*** |
| 579 |
A6 |
1.0uL |
452.56 |
1 |
true.d13C |
1.15 |
0.10 |
0 |
*** |
| 580 |
A6 |
1.0uL |
426.28 |
1 |
true.d13C |
1.11 |
0.08 |
0 |
*** |
Parameters
data_params <- data_w_calibs %>%
# pull out remaining columns we want available (some might already be pulled out but that's okay)
#Note: Garrett changed "Preparation" to "Seed Oxidation" - we may want a different shape variable
iso_unnest_calib_data(select = c(file_datetime, ampl_sample_mean.mV, `Seed Oxidation`, is_standard))
# visualize the delta calibration fits
data_params %>%
#NEED TO FILTER OUT SAMPLES iso_filter_files(`Identifier 1` == "A5")
iso_visualize_delta_calib_fits(x = Analysis, color = `Identifier 2`, shape = `Seed Oxidation`, size = ampl_sample_mean.mV,
include_from_summary = c(adj.r.squared, deviance)) + labs(title = "parameters vs. analysis")

data_params %>%
iso_visualize_delta_calib_fits(x = ampl_sample_mean.mV, color = `Identifier 2`, shape = `Seed Oxidation`, size = ampl_sample_mean.mV,
include_from_summary = c(adj.r.squared, deviance)) + labs(title = "parameters vs. amplitude")

data_params %>%
iso_visualize_delta_calib_fits(x = file_datetime, color = `Identifier 2`, shape = `Seed Oxidation`, size = ampl_sample_mean.mV,
include_from_summary = c(adj.r.squared, deviance)) + labs(title = "parameters vs. time")

# turn the last plot into an interactive one
ggplotly(ggplot2::last_plot() + theme(legend.position = "none"))
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
Compounds
data_w_calibs %>%
# pull out relevant data
iso_unnest_calib_data(select = everything()) %>%
filter(is_standard) %>%
# calculate deviation from means
group_by(Analysis) %>%
mutate(
`Var: residual d13C [permil]` = resid_d13C,
`Var: area diff from mean [%]` = (`Intensity 44`/mean(`Intensity 44`) - 1) * 100,
`Var: amplitude diff from mean [%]` = (`Ampl44`/mean(`Ampl44`) - 1) * 100
) %>%
# visualize
iso_visualize_data(x = compound, y = starts_with("Var"), group = Analysis, color = `Identifier 2`)

ggplotly(ggplot2::last_plot())
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`